# Importing necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from six import StringIO
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
# Load libraries
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import VotingClassifier #bagging
import pickle
import joblib
from sklearn.metrics import classification_report, confusion_matrix
from IPython.display import Image
from sklearn.tree import export_graphviz
import pydotplus
import plotly.graph_objects as go
import plotly.offline as pyo
import plotly.express as px
from plotly.subplots import make_subplots
from plotly import tools
from sklearn.metrics import precision_recall_fscore_support
def reading_csv(filename):
df=pd.read_csv(filename)
return df
def extract_features(df):
X=df.iloc[:,0:7].values
return X
def extract_target(df):
Y=df.iloc[:,7].values
return Y
def split(X,Y):
X_train,X_test, y_train, y_test=train_test_split(X,Y, test_size=0.3, random_state=1) # 70% training and 30% test
return X_train,X_test, y_train, y_test
df_dep=reading_csv("../Datasets/Depression.csv")
X_dep=extract_features(df_dep)
Y_dep=extract_target(df_dep)
X_train_dep,X_test_dep, y_train_dep, y_test_dep=split(X_dep,Y_dep)
dep_cols=df_dep.columns[:-1]
dep_cols
df_anx=reading_csv("../Datasets/Anxiety.csv")
X_anx=extract_features(df_anx)
Y_anx=extract_target(df_anx)
X_train_anx,X_test_anx, y_train_anx, y_test_anx=split(X_anx,Y_anx)
anx_cols=df_anx.columns[:-1]
anx_cols
df_str=reading_csv("../Datasets/Stress.csv")
X_str=extract_features(df_str)
Y_str=extract_target(df_str)
X_train_str,X_test_str, y_train_str, y_test_str=split(X_str,Y_str)
str_cols=df_str.columns[:-1]
str_cols
def save_model(model,filename):
with open(filename, 'wb') as file:
pickle.dump(model, file)
def load_model(filename):
with open(filename, 'rb') as file:
model = pickle.load(file)
return model
def dump_joblib(model,filename):
joblib.dump(model, filename)
def KNN(X_train,X_test, y_train, y_test):
classifier= KNeighborsClassifier(n_neighbors=5) #3
classifier.fit(X_train,y_train)
y_pred=classifier.predict(X_test)
return classifier,y_pred
# Depression
model,y_pred=KNN(X_train_dep,X_test_dep, y_train_dep, y_test_dep)
# filename="models/dep_model_knn.pkl"
# save_model(model,filename)
knn_acc_dep=accuracy_score(y_test_dep,y_pred)
knn_cls_dep=classification_report(y_test_dep, y_pred)
# Anxiety
model,y_pred=KNN(X_train_anx,X_test_anx, y_train_anx, y_test_anx)
# filename="models/anx_model_knn.pkl"
# save_model(model,filename)
knn_acc_anx=accuracy_score(y_test_anx,y_pred)
knn_cls_anx=classification_report(y_test_anx, y_pred)
# Stress
model,y_pred=KNN(X_train_str,X_test_str, y_train_str, y_test_str)
# filename="models/str_model_knn.pkl"
# save_model(model,filename)
knn_acc_str=accuracy_score(y_test_str,y_pred)
knn_cls_str=classification_report(y_test_str, y_pred)
model = load_model("../models/dep_model_knn.pkl")
y_pred = model.predict(X_test_dep[[4]])
def GNB(X_train,X_test, y_train, y_test):
gnb = GaussianNB()
gnb.fit(X_train,y_train)
y_pred = gnb.predict(X_test)
return gnb,y_pred
# Depression
model,y_pred=GNB(X_train_dep,X_test_dep, y_train_dep, y_test_dep)
# filename="models/dep_model_gnb.pkl"
# save_model(model,filename)
gnb_acc_dep=accuracy_score(y_test_dep,y_pred)
gnb_cls_dep=classification_report(y_test_dep, y_pred)
# Anxiety
model,y_pred=GNB(X_train_anx,X_test_anx, y_train_anx, y_test_anx)
# filename="models/anx_model_gnb.pkl"
# save_model(model,filename)
gnb_acc_anx=accuracy_score(y_test_anx,y_pred)
gnb_cls_anx=classification_report(y_test_anx, y_pred)
# Stress
model,y_pred=GNB(X_train_str,X_test_str, y_train_str, y_test_str)
# filename="models/str_model_gnb.pkl"
# save_model(model,filename)
gnb_acc_str=accuracy_score(y_test_str,y_pred)
gnb_cls_str=classification_report(y_test_str, y_pred)
def DTree(criteria,X_train,X_test, y_train):
cls = DecisionTreeClassifier(criterion=criteria, max_depth=10,splitter='best')
cls.fit(X_train,y_train) #training of classifier
y_pred = cls.predict(X_test)
return cls,y_pred
# Depression
model,y_pred=DTree('gini',X_train_dep,X_test_dep, y_train_dep)
# filename="models/dep_model_dt.pkl"
# save_model(model,filename)
dt_acc_dep=accuracy_score(y_test_dep,y_pred)
dt_cls_dep=classification_report(y_test_dep, y_pred)
#Decision tree output
dot_data = StringIO()
export_graphviz(model, out_file=dot_data,
filled=True, rounded=True,
special_characters=True, feature_names = dep_cols,class_names=['0','1','2','3','4'])
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
graph.write_png('depression.png')
Image(graph.create_png())
# Anxiety
model,y_pred=DTree('gini',X_train_anx,X_test_anx, y_train_anx)
# filename="models/anx_model_dt.pkl"
# save_model(model,filename)
dt_acc_anx=accuracy_score(y_test_anx,y_pred)
dt_cls_anx=classification_report(y_test_anx, y_pred)
#Decision tree output
dot_data = StringIO()
export_graphviz(model, out_file=dot_data,
filled=True, rounded=True,
special_characters=True, feature_names = anx_cols,class_names=['0','1','2','3','4'])
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
graph.write_png('anxiety.png')
Image(graph.create_png())
# Stress
model,y_pred=DTree('gini',X_train_str,X_test_str, y_train_str)
# filename="models/str_model_dt.pkl"
# save_model(model,filename)
dt_acc_str=accuracy_score(y_test_str,y_pred)
dt_cls_str=classification_report(y_test_str, y_pred)
#Decision tree output
dot_data = StringIO()
export_graphviz(model, out_file=dot_data,
filled=True, rounded=True,
special_characters=True, feature_names = str_cols,class_names=['0','1','2','3','4'])
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
graph.write_png('stress.png')
Image(graph.create_png())
# Depression
model,y_pred=DTree('entropy',X_train_dep,X_test_dep, y_train_dep)
dt_ent_acc_dep=accuracy_score(y_test_dep,y_pred)
dt_ent_cls_dep=classification_report(y_test_dep, y_pred)
# Anxiety
model,y_pred=DTree('entropy',X_train_anx,X_test_anx, y_train_anx)
dt_ent_acc_anx=accuracy_score(y_test_anx,y_pred)
dt_ent_cls_anx=classification_report(y_test_anx, y_pred)
# Stress
model,y_pred=DTree('entropy',X_train_str,X_test_str, y_train_str)
dt_ent_acc_str=accuracy_score(y_test_str,y_pred)
dt_ent_cls_str=classification_report(y_test_str, y_pred)
def RForest(X_train,X_test, y_train):
rf = RandomForestClassifier(n_estimators=50, random_state=2)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
return rf,y_pred
# Depression
model,y_pred=RForest(X_train_dep,X_test_dep, y_train_dep)
# filename="models/dep_model_rf.pkl"
# save_model(model,filename)
rf_acc_dep=accuracy_score(y_test_dep,y_pred)
rf_cls_dep=classification_report(y_test_dep, y_pred)
# Anxiety
model,y_pred=RForest(X_train_anx,X_test_anx, y_train_anx)
# filename="models/anx_model_rf.pkl"
# save_model(model,filename)
rf_acc_anx=accuracy_score(y_test_anx,y_pred)
rf_cls_anx=classification_report(y_test_anx, y_pred)
# Stress
model,y_pred=RForest(X_train_str,X_test_str, y_train_str)
# filename="models/str_model_dt.pkl"
# save_model(model,filename)
rf_acc_str=accuracy_score(y_test_str,y_pred)
rf_cls_str=classification_report(y_test_str, y_pred)
from sklearn import svm
def SVM(X_train,X_test, y_train):
sv = svm.SVC(probability=True, kernel='linear')
sv.fit(X_train, y_train)
y_pred = sv.predict(X_test)
return sv,y_pred
# Depression
model,y_pred=SVM(X_train_dep,X_test_dep, y_train_dep)
# filename="models/dep_model_svm.pkl"
# save_model(model,filename)
svm_acc_dep=accuracy_score(y_test_dep,y_pred)
svm_cls_dep=classification_report(y_test_dep, y_pred)
# Anxiety
model,y_pred=SVM(X_train_anx,X_test_anx, y_train_anx)
# filename="models/anx_model_svm.pkl"
# save_model(model,filename)
svm_acc_anx=accuracy_score(y_test_anx,y_pred)
svm_cls_anx=classification_report(y_test_anx, y_pred)
# Stress
model,y_pred=SVM(X_train_str,X_test_str, y_train_str)
# filename="models/str_model_svm.pkl"
# save_model(model,filename)
svm_acc_str=accuracy_score(y_test_str,y_pred)
svm_cls_str=classification_report(y_test_str, y_pred)
def bagging(X_train,X_test, y_train):
estimators = []
model1= KNeighborsClassifier(n_neighbors=3)
estimators.append(('KNN', model1))
model2 = DecisionTreeClassifier()
estimators.append(('cart',model2))
model3 = SVC(probability=True, kernel='linear')
estimators.append(('svm', model3))
ensemble = VotingClassifier(estimators) #bagging ensembLe
eclf1 = ensemble.fit(X_train,y_train)
y_pred=eclf1.predict(X_test)
return ensemble,y_pred
# Depression
model,y_pred=bagging(X_train_dep,X_test_dep, y_train_dep)
# filename="models/dep_model_ens.pkl"
# save_model(model,filename)
ens_acc_dep=accuracy_score(y_test_dep,y_pred)
ens_cls_dep=classification_report(y_test_dep, y_pred)
# Anxiety
model,y_pred=bagging(X_train_anx,X_test_anx, y_train_anx)
# filename="models/anx_model_ens.pkl"
# save_model(model,filename)
ens_acc_anx=accuracy_score(y_test_anx,y_pred)
ens_cls_anx=classification_report(y_test_anx, y_pred)
# Stress
model,y_pred=bagging(X_train_str,X_test_str, y_train_str)
# filename="models/str_model_ens.pkl"
# save_model(model,filename)
ens_acc_str=accuracy_score(y_test_str,y_pred)
ens_cls_str=classification_report(y_test_str, y_pred)
def AdaBoost(X_train,X_test, y_train):
svc=SVC(probability=True, kernel='linear')
abc = AdaBoostClassifier(n_estimators=10 ,base_estimator=svc)
abc.fit(X_train, y_train)
y_pred = abc.predict(X_test)
return abc,y_pred
# Depression
model,y_pred=AdaBoost(X_train_dep,X_test_dep, y_train_dep)
# filename="models/dep_model_ada.pkl"
# save_model(model,filename)
ada_acc_dep=accuracy_score(y_test_dep,y_pred)
ada_cls_dep=classification_report(y_test_dep, y_pred)
# filename="models/dep_model_ada.joblib"
# dump_joblib(model,filename)
ada_acc_dep
# Anxiety
model,y_pred=AdaBoost(X_train_anx,X_test_anx, y_train_anx)
# filename="models/anx_model_ada.pkl"
# save_model(model,filename)
ada_acc_anx=accuracy_score(y_test_anx,y_pred)
ada_cls_anx=classification_report(y_test_anx, y_pred)
# filename="models/anx_model_ada.joblib"
# dump_joblib(model,filename)
ada_acc_anx
# Stress
model,y_pred=AdaBoost(X_train_str,X_test_str, y_train_str)
# filename="models/str_model_ada.pkl"
# save_model(model,filename)
ada_acc_str=accuracy_score(y_test_str,y_pred)
ada_cls_str=classification_report(y_test_str, y_pred)
# filename="models/str_model_ada.joblib"
# dump_joblib(model,filename)
ada_acc_str
model = load_model("../models/dep_model_ada.pkl")
y_pred = model.predict(X_test_dep)
ada_acc_dep=accuracy_score(y_test_dep,y_pred)
ada_cls_dep=classification_report(y_test_dep, y_pred)
model = load_model("../models/anx_model_ada.pkl")
y_pred = model.predict(X_test_anx)
ada_acc_anx=accuracy_score(y_test_anx,y_pred)
ada_cls_anx=classification_report(y_test_anx, y_pred)
model = load_model("../models/str_model_ada.pkl")
y_pred = model.predict(X_test_str)
ada_acc_str=accuracy_score(y_test_str,y_pred)
ada_cls_str=classification_report(y_test_str, y_pred)
print("Depression accuracy for KNN: ",knn_acc_dep)
print("Anxiety accuracy for KNN: ",knn_acc_anx)
print("Stress accuracy for KNN: ",knn_acc_str)
print("\n\n")
print("Depression accuracy for Naive Bayes: ",gnb_acc_dep)
print("Anxiety accuracy for Naive Bayes: ",gnb_acc_anx)
print("Stress accuracy for Naive Bayes: ",gnb_acc_str)
print("\n\n")
print("Depression accuracy for Decision Tree using Gini: ",dt_acc_dep)
print("Anxiety accuracy for Decision Tree using Gini: ",dt_acc_anx)
print("Stress accuracy for Decision Tree using Gini: ",dt_acc_str)
print("\n\n")
print("Depression accuracy for Decision Tree using Entropy: ",dt_ent_acc_dep)
print("Anxiety accuracy for Decision Tree using Entropy: ",dt_ent_acc_anx)
print("Stress accuracy for Decision Tree using Entropy: ",dt_ent_acc_str)
print("\n\n")
print("Depression accuracy for Random Forest: ",rf_acc_dep)
print("Anxiety accuracy for Random Forest: ",rf_acc_anx)
print("Stress accuracy for Random Forest: ",rf_acc_str)
print("\n\n")
print("Depression accuracy for SVM: ",svm_acc_dep)
print("Anxiety accuracy for SVM: ",svm_acc_anx)
print("Stress accuracy for SVM: ",svm_acc_str)
print("\n\n")
print("Depression accuracy for Bagging Method: ",ens_acc_dep)
print("Anxiety accuracy for Bagging Method: ",ens_acc_anx)
print("Stress accuracy for Bagging Method: ",ens_acc_str)
print("\n\n")
print("Depression accuracy for Adaboosting: ",ada_acc_dep)
print("Anxiety accuracy for Adaboosting: ",ada_acc_anx)
print("Stress accuracy for Adaboosting: ",ada_acc_str)
print("Depression Classification Report for KNN: \n",knn_cls_dep)
print("Anxiety Classification Report for KNN:\n ",knn_cls_anx)
print("Stress Classification Report for KNN:\n ",knn_cls_str)
print("\n\n")
print("Depression Classification Report for Naive Bayes: \n",gnb_cls_dep)
print("Anxiety Classification Report for Naive Bayes: \n",gnb_cls_anx)
print("Stress Classification Report for Naive Bayes: \n",gnb_cls_str)
print("\n\n")
print("Depression Classification Report for Decision Tree using Gini: \n",dt_cls_dep)
print("Anxiety Classification Report for Decision Tree using Gini: \n",dt_cls_anx)
print("Stress Classification Report for Decision Tree using Gini: \n",dt_cls_str)
print("\n\n")
print("Depression Classification Report for Decision Tree using Entropy: \n",dt_ent_cls_dep)
print("Anxiety Classification Report for Decision Tree using Entropy: \n",dt_ent_cls_anx)
print("Stress Classification Report for Decision Tree using Entropy: \n",dt_ent_cls_str)
print("\n\n")
print("Depression Classification Report for Random Forest: \n",rf_cls_dep)
print("Anxiety Classification Report for Random Forest:\n ",rf_cls_anx)
print("Stress Classification Report for Random Forest: \n",rf_cls_str)
print("\n\n")
print("Depression Classification Report for SVM:\n ",svm_cls_dep)
print("Anxiety Classification Report for SVM:\n ",svm_cls_anx)
print("Stress Classification Report for SVM: \n",knn_cls_str)
print("\n\n")
print("Depression Classification Report for Bagging Method: \n",ens_cls_dep)
print("Anxiety Classification Report for Bagging Method:\n ",ens_cls_anx)
print("Stress Classification Report for Bagging Method: \n",ens_cls_str)
print("\n\n")
print("Depression Classification Report for Adaboosting: \n",ada_cls_dep)
print("Anxiety Classification Report for Adaboosting: \n",ada_cls_anx)
print("Stress Classification Report for Adaboosting: \n",ada_cls_str)
x=['KNN','GaussianNB','Decision Tree(Gini)','Decision Tree(Entropy)','Random Forest','SVM','Bagging','Adaboost']
ydep=[knn_acc_dep,gnb_acc_dep,dt_acc_dep,dt_ent_acc_dep,rf_acc_dep,svm_acc_dep,ens_acc_dep,ada_acc_dep]
yanx=[knn_acc_anx,gnb_acc_anx,dt_acc_anx,dt_ent_acc_anx,rf_acc_anx,svm_acc_anx,ens_acc_anx,ada_acc_anx]
ystr=[knn_acc_str,gnb_acc_str,dt_acc_str,dt_ent_acc_str,rf_acc_str,svm_acc_str,ens_acc_str,ada_acc_str]
data = {'Algorithms':x,
'DepressionAccuracy':ydep,
'AnxietyAccuracy':yanx,
'StressAccuracy':ystr}
df_acc=pd.DataFrame(data)
df_acc.head()
df_acc.to_csv('Accuracy.csv',index=False)
df_acc=reading_csv('../Datasets/Accuracy.csv')
colors = ['#274472', '#5885AF', '#C3E0E5','#007e79']
fig = go.Figure(data=[
go.Bar(name='Depression', x=df_acc['Algorithms'], y=df_acc['DepressionAccuracy'],marker_color=colors[0]),
go.Bar(name='Stress', x=df_acc['Algorithms'], y=df_acc['AnxietyAccuracy'],marker_color=colors[1]),
go.Bar(name='Anxiety', x=df_acc['Algorithms'], y=df_acc['StressAccuracy'],marker_color=colors[2]),
])
# Change the bar mode
fig.update_layout(barmode='group')
fig.write_image("images/acc.png")
fig.show()